/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
 * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
 * Copyright (c) 2004-2005 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
 * Copyright (c) 2015      Los Alamos National Security, LLC. All rights
 *                         reserved.
 * $COPYRIGHT$
 *
 * Additional copyrights may follow
 *
 * $HEADER$
 */
/**
 * @file
 *
 * Snapshot Coordination (SNAPC) Interface
 *
 * Terminology:
 * ------------
 *  Global Snapshot Coordinator:
 *     - HNP(s) coordination function.
 *  Local Snapshot Coordinator
 *     - VHNP(s) [e.g., orted] coordination function
 *  Application Snapshot Coordinator
 *     - Application level coordinaton function
 *  Local Snapshot
 *     - Snapshot generated by a single process in the parallel job
 *  Local Snapshot Reference
 *     - A generic reference to the physical Local Snapshot
 *  Global Snapshot
 *     - Snapshot generated for the entire parallel job
 *  Global Snapshot Reference
 *     - A generic reference to the physical Global Snapshot
 *
 * General Description:
 * ---------------------
 * This framework is tasked with:
 * - Initiating the checkpoint in the system
 * - Physically moving the local snapshot files to a location
 *   Initially this location, is the node on which the Head Node Process (HNP)
 *   is running, but later this will be a replicated checkpoint server or
 *   the like.
 * - Generating a 'global snapshot handle' that the user can use to restart
 *   the parallel job.
 *
 * Each component will have 3 teirs of behavior that must behave in concert:
 *  - Global Snapshot Coordinator
 *    This is the HNPs tasks. Mostly distributing the notification of the
 *    checkpoint, and then compiling the physical and virtual nature of the
 *    global snapshot handle.
 *  - Local Snapshot Coordinator
 *    This is the VHNPs (or orted, if available) tasks. This will involve
 *    working with the Global Snapshot Coordinator to route the physical
 *    and virtual 'local snapshot's from the application to the desired
 *    location. This process must also notify the Global Snapshot Coordinator
 *    when it's set of processes have completed the checkpoint.
 *  - Application Snapshot Coordinator
 *    This is the application level coordinator. This is very light, just
 *    a subscription to be triggered when it needs to checkpoint, and then,
 *    once finished with the checkpoint, notify the Local Snapshot Coordinator
 *    that it is complete.
 *    If there is no orted (so no bootproxy), then the application assumes the
 *    responsibility of the Local Snapshot Coordinator as well.
 *
 */

#ifndef MCA_SNAPC_H
#define MCA_SNAPC_H

#include "orte_config.h"
#include "orte/constants.h"
#include "orte/types.h"

#include "orte/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/mca/crs/crs.h"
#include "opal/mca/crs/base/base.h"

#include "opal/class/opal_object.h"
#include "opal/class/opal_pointer_array.h"
#include "opal/util/output.h"

#include "orte/mca/sstore/sstore.h"

BEGIN_C_DECLS

/**
 * States that a process can be in while checkpointing
 */
/* Reached an error */
#define ORTE_SNAPC_CKPT_STATE_ERROR           0

/* Doing no checkpoint -- Quiet state */
#define ORTE_SNAPC_CKPT_STATE_NONE            1
/* There has been a request for a checkpoint from one of the applications */
#define ORTE_SNAPC_CKPT_STATE_REQUEST         2
/* There is a Pending checkpoint for this process */
#define ORTE_SNAPC_CKPT_STATE_PENDING         3
/* Running the checkpoint */
#define ORTE_SNAPC_CKPT_STATE_RUNNING         4
/* INC Prep Finished */
#define ORTE_SNAPC_CKPT_STATE_INC_PREPED      5
/* All Processes have been stopped */
#define ORTE_SNAPC_CKPT_STATE_STOPPED         6
/* Finished the checkpoint locally */
#define ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL  7
/* Migrating */
#define ORTE_SNAPC_CKPT_STATE_MIGRATING       8
/* Finished establishing the checkpoint */
#define ORTE_SNAPC_CKPT_STATE_ESTABLISHED     9
/* Processes continuing or have been recovered (finished post-INC) */
#define ORTE_SNAPC_CKPT_STATE_RECOVERED      10
/* Unable to checkpoint this job */
#define ORTE_SNAPC_CKPT_STATE_NO_CKPT        11
/* Unable to restart this job */
#define ORTE_SNAPC_CKPT_STATE_NO_RESTART     12
#define ORTE_SNAPC_CKPT_MAX                  13

/**
 * Sufficiently high shift value to avoid colliding the process
 * checkpointing states above with the ORTE process states
 */
#define ORTE_SNAPC_CKPT_SHIFT                131072

/* Uniquely encode the SNAPC state */
#define ORTE_SNAPC_CKPT_NOTIFY(state) (ORTE_SNAPC_CKPT_SHIFT + state)

/* Decode the SNAPC state */
#define ORTE_SNAPC_CKPT_STATE(state) (state - ORTE_SNAPC_CKPT_SHIFT)

/* Check whether a state is a SNAPC state or not. */
#define CHECK_ORTE_SNAPC_CKPT_STATE(state) (state >= ORTE_SNAPC_CKPT_SHIFT)

/**
 * Definition of a orte local snapshot.
 * Similar to the opal_crs_base_snapshot_t except that it
 * contains process contact information.
 */
struct orte_snapc_base_local_snapshot_1_0_0_t {
    /** List super object */
    opal_list_item_t super;

    /** ORTE Process name */
    orte_process_name_t process_name;

    /** State of the checkpoint */
    int state;

    /** Stable Storage Handle (must equal the global version) */
    orte_sstore_base_handle_t ss_handle;
};
typedef struct orte_snapc_base_local_snapshot_1_0_0_t orte_snapc_base_local_snapshot_1_0_0_t;
typedef struct orte_snapc_base_local_snapshot_1_0_0_t orte_snapc_base_local_snapshot_t;

ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_snapc_base_local_snapshot_t);

/**
 * Definition of the global snapshot.
 * Each component is assumed to have extened this definition
 * in the same way they extern the orte_snapc_base_compoinent_t below.
 */
struct orte_snapc_base_global_snapshot_1_0_0_t {
    /** This is an object, so must have super */
    opal_list_item_t super;

    /** A list of orte_snapc_base_snapshot_t's */
    opal_list_t local_snapshots;

    /** Checkpoint Options */
    opal_crs_base_ckpt_options_t *options;

    /** Stable Storage Handle */
    orte_sstore_base_handle_t ss_handle;
};
typedef struct orte_snapc_base_global_snapshot_1_0_0_t orte_snapc_base_global_snapshot_1_0_0_t;
typedef struct orte_snapc_base_global_snapshot_1_0_0_t orte_snapc_base_global_snapshot_t;

ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_snapc_base_global_snapshot_t);

struct orte_snapc_base_quiesce_1_0_0_t {
    /** Parent is an object type */
    opal_object_t super;

    /** Current epoch */
    int epoch;
    /** Requested CRS */
    char * crs_name;
    /** Handle for reference */
    char * handle;
    /** snapshot list */
    orte_snapc_base_global_snapshot_t *snapshot;

    /** Stable Storage Handle */
    orte_sstore_base_handle_t ss_handle;
    /** Stable Storage Snapshot list */
    orte_sstore_base_global_snapshot_info_t *ss_snapshot;

    /** Target Directory */
    char * target_dir;
    /** Command Line */
    char * cmdline;
    /** State of operation if checkpointing */
    opal_crs_state_type_t cr_state;
    /** Checkpointing? */
    bool checkpointing;
    /** Restarting? */
    bool restarting;

    /** Migrating? */
    bool migrating;
    /** List of migrating processes */
    int num_migrating;
    opal_pointer_array_t migrating_procs;
};
typedef struct orte_snapc_base_quiesce_1_0_0_t orte_snapc_base_quiesce_1_0_0_t;
typedef struct orte_snapc_base_quiesce_1_0_0_t orte_snapc_base_quiesce_t;

ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_snapc_base_quiesce_t);

/**
 * Application request for a global checkpoint related operation
 */
typedef enum {
    ORTE_SNAPC_OP_NONE = 0,
    ORTE_SNAPC_OP_INIT,
    ORTE_SNAPC_OP_FIN,
    ORTE_SNAPC_OP_FIN_ACK,
    ORTE_SNAPC_OP_CHECKPOINT,
    ORTE_SNAPC_OP_RESTART,
    ORTE_SNAPC_OP_MIGRATE,
    ORTE_SNAPC_OP_QUIESCE_START,
    ORTE_SNAPC_OP_QUIESCE_CHECKPOINT,
    ORTE_SNAPC_OP_QUIESCE_END
} orte_snapc_base_request_op_event_t;

struct orte_snapc_base_request_op_1_0_0_t {
    /** Parent is an object type */
    opal_object_t super;

    /** Event to request */
    orte_snapc_base_request_op_event_t event;

    /** Is this request still active */
    bool is_active;

    /** Leader of the operation */
    int leader;

    /** Sequence Number */
    int seq_num;

    /** Global Handle */
    char * global_handle;

    /** Stable Storage Handle */
    orte_sstore_base_handle_t ss_handle;

    /** Migrating vpid list of participants */
    int mig_num;
    int *mig_vpids;

    /** Migrating hostname preference list */
    char (*mig_host_pref)[OPAL_MAX_PROCESSOR_NAME];

    /** Migrating vpid preference list */
    int *mig_vpid_pref;

    /** Info key */
    int *mig_off_node;
};
typedef struct orte_snapc_base_request_op_1_0_0_t orte_snapc_base_request_op_1_0_0_t;
typedef struct orte_snapc_base_request_op_1_0_0_t orte_snapc_base_request_op_t;

ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_snapc_base_request_op_t);

/**
 * Module initialization function.
 * Returns ORTE_SUCCESS
 */
typedef int (*orte_snapc_base_module_init_fn_t)
     (bool seed, bool app);

/**
 * Module finalization function.
 * Returns ORTE_SUCCESS
 */
typedef int (*orte_snapc_base_module_finalize_fn_t)
     (void);

/**
 * Setup the necessary structures for this job
 * Returns ORTE_SUCCESS
 */
typedef int (*orte_snapc_base_setup_job_fn_t)
     (orte_jobid_t jobid);

/**
 * Setup the necessary structures for this job
 * Returns ORTE_SUCCESS
 */
typedef int (*orte_snapc_base_release_job_fn_t)
     (orte_jobid_t jobid);


/**
 * Handle fault tolerance updates
 *
 * @param[in] state Fault tolerance state update
 *
 * @retval ORTE_SUCCESS The operation completed successfully
 * @retval ORTE_ERROR   An unspecifed error occurred
 */
typedef int  (*orte_snapc_base_ft_event_fn_t)(int state);

/**
 * Start a checkpoint originating from an internal source.
 *
 * This really only makes sense to call from an application, but in the future
 * we may allow the checkpoint operation to use this function from the local
 * coordinator.
 *
 * @param[out] epoch Epoch number to associate with this checkpoint operation
 * Returns ORTE_SUCCESS
 */
typedef int (*orte_snapc_base_start_checkpoint_fn_t)
    (orte_snapc_base_quiesce_t *datum);

/**
 * Signal end of checkpoint epoch originating from an internal source.
 *
 * @param[in] epoch Epoch number to associate with this checkpoint operation
 * Returns ORTE_SUCCESS
 */
typedef int (*orte_snapc_base_end_checkpoint_fn_t)
    (orte_snapc_base_quiesce_t *datum);

/**
 * Request a checkpoint related operation to take place
 */
typedef int (*orte_snapc_base_request_op_fn_t)
    (orte_snapc_base_request_op_t *datum);

/**
 * Structure for SNAPC components.
 */
struct orte_snapc_base_component_2_0_0_t {
    /** MCA base component */
    mca_base_component_t base_version;
    /** MCA base data */
    mca_base_component_data_t base_data;

    /** Verbosity Level */
    int verbose;
    /** Output Handle for opal_output */
    int output_handle;
    /** Default Priority */
    int priority;
};
typedef struct orte_snapc_base_component_2_0_0_t orte_snapc_base_component_2_0_0_t;
typedef struct orte_snapc_base_component_2_0_0_t orte_snapc_base_component_t;

/**
 * Structure for SNAPC modules
 */
struct orte_snapc_base_module_1_0_0_t {
    /** Initialization Function */
    orte_snapc_base_module_init_fn_t           snapc_init;
    /** Finalization Function */
    orte_snapc_base_module_finalize_fn_t       snapc_finalize;
    /** Setup structures for a job */
    orte_snapc_base_setup_job_fn_t             setup_job;
    /** Release job */
    orte_snapc_base_release_job_fn_t           release_job;
    /** Handle any FT Notifications */
    orte_snapc_base_ft_event_fn_t              ft_event;
    /** Handle internal request for checkpoint */
    orte_snapc_base_start_checkpoint_fn_t      start_ckpt;
    orte_snapc_base_end_checkpoint_fn_t        end_ckpt;
    /** Handle a checkpoint related request */
    orte_snapc_base_request_op_fn_t            request_op;
};
typedef struct orte_snapc_base_module_1_0_0_t orte_snapc_base_module_1_0_0_t;
typedef struct orte_snapc_base_module_1_0_0_t orte_snapc_base_module_t;

ORTE_DECLSPEC extern orte_snapc_base_module_t orte_snapc;
ORTE_DECLSPEC extern orte_snapc_base_component_t orte_snapc_base_selected_component;

/**
 * Macro for use in components that are of type SNAPC
 */
#define ORTE_SNAPC_BASE_VERSION_2_0_0 \
    ORTE_MCA_BASE_VERSION_2_1_0("snapc", 2, 0, 0)

END_C_DECLS

#endif /* ORTE_SNAPC_H */

